
#Raking


#Dependencies

rm(list = ls()) #Clean environment

library(dplyr)
library(tidyverse)
library(survey)
library(reshape2)
library(ggplot2)
library(grid)
library(gridExtra)

#External functions

eval(parse("functions.R", encoding="UTF-8"))
source("Quotas.R",encoding = "utf-8")

#Cleaners, apply functions from functions.R to transforma the data for each coutry.
fn.cleaner <- function(dataframe){
  dataframe <- fn.age(dataframe)
  dataframe <- fn.education(dataframe)
  dataframe <- fn.gender(dataframe)
  dataframe <- fn.regions(dataframe)
  return(dataframe)
}

#DROPNA´s, Raking algorithm cannot handle missing values, drop NA drops NA´s
#return a dataframe without NA´s for Age, Education, Region and Gender
fn.dropNA <-function(dataframe){
  dataframe <- dataframe[!dataframe$REGION_0=="NA",] 
  dataframe <- dataframe[!dataframe$gender=="NA",]
  dataframe <- dataframe[!dataframe$age=="NA",]
  dataframe <- dataframe[!dataframe$education=="NA",]
  

  dataframe <- dataframe %>% select('id','REGION_0','age','gender','education')
  dataframe <- dataframe[!is.na(dataframe$REGION_0),]
  
  return(dataframe)
}


#Apply´s dropNA function, applies raking algorith accordingly to variable pass for each country, here it is neccesary to pass census quotas
#return the weights for each observations after dropNA was applied
fn.rake <-function(dataframe,list_of_variables,list_of_quotas){
  dataframe <- fn.dropNA(dataframe)
  dsurvey <-svydesign(id=~id, data=dataframe)
  dsurveyr <- rake(dsurvey,list_of_variables,list_of_quotas)
  weights <- attr(dsurveyr[["postStrata"]][[1]][[1]],'weights')
  dataframe$weights <- weights
  return(dataframe)
  
}


#Recomputes weigths for missing observations base on weights computed in rake
#returns a dataframe with weights for all the id surveyed 
fn.merge <-function(dataframe,list_of_variables,list_of_quotas){
data_w <- fn.rake(dataframe,list_of_variables,list_of_quotas)
Survey_with_weights <- merge(dataframe,data_w,by ="id", all.x=TRUE)
Survey_with_weights <- Survey_with_weights %>% select("id","weights")
number_of_rows <- nrow(Survey_with_weights)  
Survey_with_weights$weights[is.na(Survey_with_weights$weights)] <- 100/number_of_rows
Survey_with_weights$weights <- (Survey_with_weights$weights/sum(Survey_with_weights$weights))*100
return(Survey_with_weights)
}

